import jieba


sent1 = "申请一个QQ邮箱"
sent2 = "注册一个QQ邮箱"

sents = [sent1, sent2]
texts = [[word for word in jieba.lcut(sent)] for sent in sents]
print(texts)

from gensim import corpora
from gensim.similarities import Similarity

# 语料库
dictionary = corpora.Dictionary(texts)

#利用doc2bow作为词袋模型
corpus = [dictionary.doc2bow(text) for text in texts]
similarity = Similarity('-Similarity-index', corpus, num_features=len(dictionary))
print(similarity)

#获取句子相似度
new_sentence = sent1
test_corpus_1 = dictionary.doc2bow(jieba.lcut(new_sentence))

cosine_sim = similarity[test_corpus_1][1]
print("利用gensim计算得到俩个句子的相似度：%.4f。" % cosine_sim)